Save the issues!

Get issues from GitHub repositories and save them into a file. I'll follow this Kaggle competition data format because it's similar.


In [1]:
# Personal access token (GitHub)
TOKEN = None
with open("github.token", "r") as gt:
    TOKEN = gt.read()
USER = "aaossa"

In [2]:
from collections import namedtuple


# Repositories
# Note: Use NamedTuple [1] "if you were going to create a
# bunch of instances of a class [...] and not change the 
# attributes after you them in __init__ [..]" [2]
# [1]: https://docs.python.org/3/library/collections.html#collections.namedtuple
# [2]: http://stackoverflow.com/a/9872434/3281097
Repository = namedtuple("Repository", ["owner", "repo"])

REPOS = list()
REPOS.append(Repository(owner="IIC2233-2015-1", repo="syllabus"))
REPOS.append(Repository(owner="IIC2233-2015-2", repo="syllabus"))
REPOS.append(Repository(owner="IIC2233-2016-1", repo="syllabus"))
REPOS.append(Repository(owner="IIC2233-2016-02", repo="Syllabus"))

# Issues
Issue = namedtuple("Issue", ["number", "title", "body", "labels", "url"])

In [3]:
import re
from requests import Session


# Maybe should be 'page=(\d+)[^?]+?>; rel="last"' (not tested)
PAGE_REGEX = re.compile('page=(\d+)(&state=all)?>; rel="last"')
ROOT = "https://api.github.com"
SESSION = Session()
SESSION.auth = (USER, TOKEN)

In [4]:
def get_issues_for_repository(repository, session=SESSION):
    """
    [IN] repository <Repository>: Repository objet with owner and name of the repo
    [IN] session <requests.Session> (opt): Session object used to send requests to the API
    
    Asks for every page of issues and returns a list with every issue
    in the repository.
     
    [OUT] issues <map<Issue>>: List of issues (Issue), each one with its number, title
                    body, labels and url.
    """
    issues = list()
    # List issues for each repository [1]
    # [1]: https://developer.github.com/v3/issues/#list-issues-for-a-repository
    endpoint_url = "{root}/repos/{owner}/{repo}/issues"
    endpoint_url = endpoint_url.format(root=ROOT, owner=repository.owner, repo=repository.repo)

    # Traversing with Pagination [2]
    # [2]: https://developer.github.com/guides/traversing-with-pagination/#basics-of-pagination
    req = session.get(endpoint_url, params={"page": 1, "state": "all"})
    _ = PAGE_REGEX.search(req.headers.get("link"))
    number_of_pages = 1 if _ is None else int(_.group(1))
    issues += process_issues(req.json())
    for page in range(2, number_of_pages + 1):
        req = session.get(endpoint_url, params={"page": page, "state": "all"})
        issues += process_issues(req.json())
    return issues


def process_issues(issues_page):
    """
    [IN] issues_page <list>: List from the response (json) given by the API
    
    Filters PRs and create an Issue object from each dictionary returned by the API
    
    [OUT] processed_issues<map<Issue>>: A map object with every issue (Issue), ignoring PRs
    """
    # Ignore pull requests (PR)
    filtered_issues = filter(lambda issue: "pull_request" not in issue.keys(), issues_page)
    # Convert each issue dictionary in a Issue namedtuple
    processed_issues = map(lambda issue: Issue(number=issue.get("number"),
                                                    title=issue.get("title"),
                                                    body=issue.get("body"),
                                                    labels=list(map(lambda label: label.get("name"), issue.get("labels"))),
                                                    url=issue.get("url")), filtered_issues)
    return processed_issues

In [5]:
# TODO: Use PEP257 [1]
# [1]: https://www.python.org/dev/peps/pep-0257/
help(get_issues_for_repository)


Help on function get_issues_for_repository in module __main__:

get_issues_for_repository(repository, session=<requests.sessions.Session object at 0x000000000595DD30>)
    [IN] repository <Repository>: Repository objet with owner and name of the repo
    [IN] session <requests.Session> (opt): Session object used to send requests to the API
    
    Asks for every page of issues and returns a list with every issue
    in the repository.
     
    [OUT] issues <map<Issue>>: List of issues (Issue), each one with its number, title
                    body, labels and url.


In [6]:
# TODO: Use PEP257
help(process_issues)


Help on function process_issues in module __main__:

process_issues(issues_page)
    [IN] issues_page <list>: List from the response (json) given by the API
    
    Filters PRs and create an Issue object from each dictionary returned by the API
    
    [OUT] processed_issues<map<Issue>>: A map object with every issue (Issue), ignoring PRs


In [7]:
# Retrieveing issues from a repository
issues_repo_demo = get_issues_for_repository(Repository(owner="aaossa", repo="aaossa.github.io"))

for issue in issues_repo_demo:
    print("#{number} - {title}".format(number=issue.number, title=issue.title))


#33 - Fix contrast
#32 - Avoid empty elements
#31 - Complete projects descriptions
#30 - Responsive footer
#27 - Translations
#26 - Markdown style
#25 - Improve header
#23 - Improve syntax highlighting
#22 - Self hosted git server
#21 - Load Disqus on demand
#20 - Icons animation on hover new code
#19 - Add descriptions in contact page
#18 - Organize .css and .scss files
#17 - Google Analytics
#16 - Design?... pls
#15 - Improve contact page
#14 - Links to social media websites
#13 - Months language
#12 - Footer
#11 - Enable Disqus comments
#10 - Load MathJax over HTTPS
#9 - Initial entry
#8 - Style
#7 - LaTeX/MathJax support
#6 - Mobile support
#5 - Useful reading
#4 - Add other pages
#3 - Multilingual website
#2 - Icon
#1 - Customize Jekyll's url

In [8]:
# Each Issue object contains only the relevant information
issue_demo = issues_repo_demo[0]

output = """\
#{number} - {title}
[{labels}]

{body}

Link: {url}"""

print(output.format(
        number=issue_demo.number, 
        title=issue_demo.title, 
        labels=", ".join(issue_demo.labels), 
        body=issue_demo.body, 
        url=issue_demo.url))


#33 - Fix contrast
[Enhancement, Priority (short-term)]

[Contrast Rebellion](http://contrastrebellion.com/)


Link: https://api.github.com/repos/aaossa/aaossa.github.io/issues/33

In [ ]: